{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a20d7d29-4c93-4901-9967-bcdfebb958dd",
   "metadata": {},
   "source": [
    "# SWIFT在【多模态领域】的应用"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b03d07c-b019-45ba-b06d-9ef5ddab30ce",
   "metadata": {},
   "source": [
    "#### 使用样例见：https://github.com/modelscope/swift/tree/main/examples/pytorch/multi_modal/notebook"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3e72ce3a-bf5a-45d4-9dc0-b1d17be4ec73",
   "metadata": {},
   "source": [
    "## 1. 文本生成图像"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6ad5a1bb-24e7-4281-9e8d-243ba2fa7ef9",
   "metadata": {},
   "source": [
    "### 1.1 安装与导入包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50c08154-69fb-4b2f-96d4-54ee9a4a8a4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# basic / third-party\n",
    "import os\n",
    "from  matplotlib import pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "# SWIFT\n",
    "from swift import Swift, SwiftModel, snapshot_download, push_to_hub\n",
    "from swift import AdapterConfig, LoRAConfig, PromptConfig, SideConfig, ResTuningConfig\n",
    "\n",
    "# Modelscope\n",
    "import modelscope\n",
    "from modelscope.pipelines import pipeline\n",
    "from modelscope.models import Model\n",
    "from modelscope.utils.config import Config\n",
    "from modelscope.metainfo import Trainers\n",
    "from modelscope.msdatasets import MsDataset\n",
    "from modelscope.trainers import build_trainer\n",
    "from modelscope.utils.constant import ModelFile\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b91d8889-1fda-4f68-bb17-b18e38f18009",
   "metadata": {},
   "source": [
    "### 1.2 Stable Diffusion\n",
    "\n",
    "High-Resolution Image Synthesis with Latent Diffusion Models\n",
    "\n",
    "<img src=\"resources/images/sd.jpg\" width=\"800\" align=\"middle\" />"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79486a84-6f9a-4aff-b338-4365b0f9037a",
   "metadata": {},
   "source": [
    "### 1.3 特定主体训练"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2297501c-9f4c-471d-a455-037db929e150",
   "metadata": {},
   "source": [
    "### 1.3.1 数据集\n",
    "- Dogs: https://modelscope.cn/datasets/buptwq/lora-stable-diffusion-finetune/summary\n",
    "\n",
    "<img src=\"resources/images/dog.jpeg\" width=\"200\" align=\"middle\" />\n",
    "\n",
    "- 加载数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88880eb4-8d8d-4ca4-b9ac-5de6fa875e0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dataset = MsDataset.load(\n",
    "        'buptwq/lora-stable-diffusion-finetune',\n",
    "        split='train',\n",
    "        ).remap_columns({'Text': 'prompt'})\n",
    "eval_dataset = MsDataset.load(\n",
    "        'buptwq/lora-stable-diffusion-finetune',\n",
    "        split='validation',\n",
    "        ).remap_columns({'Text': 'prompt'})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "997d9799-f05f-4b28-aaa4-628ba11e4551",
   "metadata": {},
   "source": [
    "### 1.3.2 使用modelscope加载SD模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15b214cf-0146-4bbe-b773-f8acbd2e1df5",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-base'\n",
    "task = 'efficient-diffusion-tuning'\n",
    "revision = 'v1.0.1'\n",
    "\n",
    "model_dir = snapshot_download(model_id)\n",
    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
    "cfg_dict.model.inference = False\n",
    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2ad596b0-add6-4050-a279-af07139484c7",
   "metadata": {},
   "source": [
    "#### 1.3.2 查看模型信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a0a0f53-b04b-4b57-ac51-767bcf5412f9",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5532d110-ac97-4585-b22b-9c74db62680d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "module_keys = [key for key, _ in model.named_modules()]\n",
    "print(module_keys)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "574e6445-5a4e-45d0-af87-496e66442d4b",
   "metadata": {},
   "source": [
    "#### 1.3.3 配置SwiftConfig + 模型准备"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c847c3e-d27d-4639-8e4e-dcdd7b590ae6",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### Swift - Transformer - LoRA\n",
    "\n",
    "<img src=\"resources/images/lora.png\" width=\"500\" align=\"middle\" />"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05a27382-c660-4e54-9a2d-9598944d505e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# unet\n",
    "lora_config = LoRAConfig(\n",
    "    r=16,\n",
    "    target_modules=\".*unet.*.(to_q|to_k|to_v|to_out.0)$\"\n",
    ")\n",
    "model = Swift.prepare_model(model, lora_config)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4883fac3-ded7-4440-9954-8c74c3f8bc08",
   "metadata": {},
   "source": [
    "#### 1.3.4 查看微调模型信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "183cfa99-ff3c-4fd5-920e-1000c4590528",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(model)\n",
    "print(model.get_trainable_parameters())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8a46fac1-288a-4edd-a3d0-99cb5e961c5f",
   "metadata": {},
   "source": [
    "#### 1.3.5 训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "599424c8-dd42-426c-a3e2-5d2383908e92",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def cfg_modify_fn(cfg):\n",
    "    cfg.preprocessor.resolution = 512\n",
    "    cfg.train.lr_scheduler = {\n",
    "        'type': 'LambdaLR',\n",
    "        'lr_lambda': lambda _: 1,\n",
    "        'last_epoch': -1\n",
    "    }\n",
    "    cfg.train.max_epochs = 100\n",
    "    cfg.train.optimizer.lr = 1e-4\n",
    "    cfg.model.inference = False\n",
    "    cfg.model.pretrained_tuner = None\n",
    "    trainer_hook = cfg.train.hooks\n",
    "    trainer_hook.append({\"type\": \"SwiftHook\"})\n",
    "    cfg.train.hooks = trainer_hook\n",
    "    return cfg\n",
    "\n",
    "work_dir = \"tmp/multimodal_swift_lora_1\"\n",
    "kwargs = dict(\n",
    "    model=model,\n",
    "    cfg_file=os.path.join(model_dir, 'configuration.json'),\n",
    "    work_dir=work_dir,\n",
    "    train_dataset=train_dataset,\n",
    "    eval_dataset=train_dataset,\n",
    "    cfg_modify_fn=cfg_modify_fn\n",
    ")\n",
    "\n",
    "trainer = build_trainer(name='efficient-diffusion-tuning', default_args=kwargs)\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "911494c9-0244-476d-adc9-4bfcddb4d6f8",
   "metadata": {},
   "source": [
    "#### 1.3.6 测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "86bb91d5-7df3-4728-98b4-4869a3da5c9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载modelscope基础模型\n",
    "model_dir = snapshot_download(model_id)\n",
    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
    "cfg_dict.model.inference = True\n",
    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)\n",
    "\n",
    "# 创建modelscope pipeline进行推理\n",
    "pipe = pipeline(task=\"efficient-diffusion-tuning\", model=model)\n",
    "\n",
    "# 推理流程\n",
    "test_prompt = \"a dog\"\n",
    "img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0][:,:,::-1]\n",
    "\n",
    "# 展示图片\n",
    "plt.xticks([]), plt.yticks([])\n",
    "plt.imshow(img_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49eda830-fb29-422f-85f2-750e91c09f99",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载Swift-Tuner模型参数\n",
    "model = Swift.from_pretrained(model, os.path.join(work_dir, 'output_swift'))\n",
    "\n",
    "pipe = pipeline(task=\"efficient-diffusion-tuning\", model=model)\n",
    "\n",
    "test_prompt = \"a dog\"\n",
    "img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0][:,:,::-1]\n",
    "plt.xticks([]), plt.yticks([])\n",
    "plt.imshow(img_out)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85bdb758-190e-49e5-aad8-3a43702e9604",
   "metadata": {},
   "source": [
    "### 1.4 特定风格训练"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9c46868d-60a1-4448-a875-9362cb8f5701",
   "metadata": {},
   "source": [
    "### 1.4.1 数据集\n",
    "- Styles: https://modelscope.cn/datasets/damo/style_custom_dataset/summary\n",
    "\n",
    "<img src=\"resources/images/flatillustration.jpeg\" width=\"200\" align=\"middle\" />\n",
    "\n",
    "- 加载数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4afe304d-a274-4b5b-b3aa-96cd0e05fcc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dataset = MsDataset.load(\n",
    "    'style_custom_dataset',\n",
    "    namespace='damo',\n",
    "    split='train',\n",
    "    subset_name='Flatillustration'\n",
    ").remap_columns({'Image:FILE': 'target:FILE'})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "93090fc3-8d83-4064-a878-b5e826b5be4a",
   "metadata": {},
   "source": [
    "### 1.4.2 使用 modelscope + Swift 准备微调模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8ec7adb-fc8e-4f01-ad9f-ca4614aa802e",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-base'\n",
    "task = 'efficient-diffusion-tuning'\n",
    "revision = 'v1.0.1'\n",
    "\n",
    "model_dir = snapshot_download(model_id)\n",
    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
    "cfg_dict.model.inference = False\n",
    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)\n",
    "\n",
    "# unet + text_encoder\n",
    "lora_config = LoRAConfig(\n",
    "    r=128,\n",
    "    lora_alpha=128,\n",
    "    target_modules=\"(.*unet.*.(to_q|to_k|to_v|to_out.0)$)|(.*text_encoder.*.(q_proj|k_proj|v_proj|out_proj)$)\"\n",
    ")\n",
    "model = Swift.prepare_model(model, lora_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a2ece30-12d5-4a78-b17e-c987af3fee44",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(model)\n",
    "print(model.get_trainable_parameters())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d704e2b5-5bb7-489d-b198-a80a1164aef1",
   "metadata": {},
   "source": [
    "#### 1.4.3 训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "648e4b79-4014-4f05-94f6-bae6fddee2d0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def cfg_modify_fn(cfg):\n",
    "    cfg.preprocessor.resolution = 512\n",
    "    cfg.train.lr_scheduler = {\n",
    "        'type': 'LambdaLR',\n",
    "        'lr_lambda': lambda _: 1,\n",
    "        'last_epoch': -1\n",
    "    }\n",
    "    cfg.train.max_epochs = 100\n",
    "    cfg.train.optimizer.lr = 1e-4\n",
    "    cfg.model.inference = False\n",
    "    cfg.model.pretrained_tuner = None\n",
    "    trainer_hook = cfg.train.hooks\n",
    "    trainer_hook.append({\"type\": \"SwiftHook\"})\n",
    "    cfg.train.hooks = trainer_hook\n",
    "    return cfg\n",
    "\n",
    "work_dir = \"tmp/multimodal_swift_lora_2\"\n",
    "kwargs = dict(\n",
    "    model=model,\n",
    "    cfg_file=os.path.join(model_dir, 'configuration.json'),\n",
    "    work_dir=work_dir,\n",
    "    train_dataset=train_dataset,\n",
    "    cfg_modify_fn=cfg_modify_fn\n",
    ")\n",
    "\n",
    "trainer = build_trainer(name='efficient-diffusion-tuning', default_args=kwargs)\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c56c2f36-5507-46f1-9c20-428c7fa367c5",
   "metadata": {},
   "source": [
    "#### 1.4.4 测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53c02514-6697-4153-86e8-22b123597cc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_dir = snapshot_download(model_id)\n",
    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
    "cfg_dict.model.inference = True\n",
    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)\n",
    "model = Swift.from_pretrained(model, os.path.join(work_dir, 'output_swift'))\n",
    "\n",
    "pipe = pipeline(task=\"efficient-diffusion-tuning\", model=model)\n",
    "\n",
    "test_prompt = \"a dog\"\n",
    "img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0][:,:,::-1]\n",
    "plt.xticks([]), plt.yticks([])\n",
    "plt.imshow(img_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05e53c3d-2a24-4102-8925-d057ba871ecd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "aigc_env",
   "language": "python",
   "name": "aigc_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
